#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division, with_statement
'''
Copyright 2015, 陈同 (chentong_biology@163.com).  
===========================================================
'''
__author__ = 'chentong & ct586[9]'
__author_email__ = 'chentong_biology@163.com'
#=========================================================
desc = '''
Program description:
    This is designed to summarize reads count output by `STAR`.

N_unmapped      397592  397592  397592
N_multimapping  2281519 2281519 2281519
N_noFeature     3391357 13297995        13358706
N_ambiguous     1200892 251064  223762
ENSG00000223972.5       0       0       0
ENSG00000227232.5       0       0       0
ENSG00000278267.1       0       0       0
ENSG00000243485.3       0       0       0

'''

import sys
import os
from json import dumps as json_dumps
from time import localtime, strftime 
timeformat = "%Y-%m-%d %H:%M:%S"
from optparse import OptionParser as OP
import re
from tools import *
#from multiprocessing.dummy import Pool as ThreadPool
import pandas as pd

#from bs4 import BeautifulSoup
reload(sys)
sys.setdefaultencoding('utf8')

debug = 0

def fprint(content):
    """ 
    This is a Google style docs.

    Args:
        param1(str): this is the first param
        param2(int, optional): this is a second param
            
    Returns:
        bool: This is a description of what is returned
            
    Raises:
        KeyError: raises an exception))
    """
    print json_dumps(content,indent=1)

def cmdparameter(argv):
    if len(argv) == 1:
        global desc
        print >>sys.stderr, desc
        cmd = 'python ' + argv[0] + ' -h'
        os.system(cmd)
        sys.exit(1)
    usages = "%prog -f file"
    parser = OP(usage=usages)
    parser.add_option("-f", "--files", dest="filein",
        metavar="FILEIN", help="`,` or ` ` separated a list of files. *.Log.final.out generated by `STAR` during mapping")
    parser.add_option("-l", "--labels", dest="label",
        metavar="LABEL", help="`,` or ` ` separated a list of labels to label each file. It must have same order as files.")
    parser.add_option("-o", "--output-file", dest="out_file",
        help="The name of output file.")
    parser.add_option("-H", "--header", dest="header",
        default=0, type='int', help="Given <1> here to indicate the first line as header line/ Default <0> meaning no header line.")
    parser.add_option("-s", "--skip-lines", dest="skip_line",
        default=4, type='int', 
        help="Given a number to skip the first n lines. Default n=4.")
    parser.add_option("-c", "--extract-cols", dest="col_number",
        default=2, type='int', 
        help="Given a number to specify which column to extract. The \
first column will be used as index. Default <2> meaning the 2nd column.")
    parser.add_option("-v", "--verbose", dest="verbose",
        action="store_true", help="Show process information")
    parser.add_option("-D", "--debug", dest="debug",
        default=False, action="store_true", help="Debug the program")
    (options, args) = parser.parse_args(argv[1:])
    assert options.filein != None, "A filename needed for -i"
    return (options, args)
#--------------------------------------------------------------------

def readAndMergeMatrix(fileL, labelL, header, skip_line, col_number):
    if header:
        header = 0
    else:
        header = None
    tmpL = []
    for file, label in zip(fileL, labelL):
        data = pd.read_table(file, header=header, index_col=0, 
                skiprows=skip_line, usecols=[0, col_number])
        data.columns = [label]
        data.index.name = "ENSG"
        tmpL.append(data)
    mergeM = pd.concat(tmpL, axis=1)
    return mergeM
#----------------------------------------------------------

def main():
    options, args = cmdparameter(sys.argv)
    #-----------------------------------
    file = options.filein
    fileL = re.split(r'[, ]*', file.strip())
    label = options.label
    labelL = re.split(r'[, ]*', label.strip())
    verbose = options.verbose
    op = options.out_file
    header = options.header
    skip_line = options.skip_line
    col_number = options.col_number - 1
    global debug
    debug = options.debug
    #-----------------------------------
    mergeM = readAndMergeMatrix(fileL, labelL, header, skip_line, col_number)
    mergeM = mergeM.fillna(0)
    mergeM = mergeM.loc[(mergeM>0).any(axis=1)]
    mergeM.to_csv(op, sep=b"\t")
#--------------------------------------------

if __name__ == '__main__':
    startTime = strftime(timeformat, localtime())
    main()
    endTime = strftime(timeformat, localtime())
    fh = open('python.log', 'a')
    print >>fh, "%s\n\tRun time : %s - %s " % \
        (' '.join(sys.argv), startTime, endTime)
    fh.close()
    ###---------profile the program---------
    #import profile
    #profile_output = sys.argv[0]+".prof.txt")
    #profile.run("main()", profile_output)
    #import pstats
    #p = pstats.Stats(profile_output)
    #p.sort_stats("time").print_stats()
    ###---------profile the program---------


